{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "IndentationError",
     "evalue": "unindent does not match any outer indentation level (<ipython-input-1-8a2699ffb8cb>, line 6)",
     "output_type": "error",
     "traceback": [
      "\u001b[1;36m  File \u001b[1;32m\"<ipython-input-1-8a2699ffb8cb>\"\u001b[1;36m, line \u001b[1;32m6\u001b[0m\n\u001b[1;33m    for line in f:\u001b[0m\n\u001b[1;37m                  ^\u001b[0m\n\u001b[1;31mIndentationError\u001b[0m\u001b[1;31m:\u001b[0m unindent does not match any outer indentation level\n"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'cells': [{'cell_type': 'code',\n",
       "   'execution_count': 18,\n",
       "   'metadata': {},\n",
       "   'outputs': [{'name': 'stdout',\n",
       "     'output_type': 'stream',\n",
       "     'text': [\"'周瑜'的词向量为：\\n\",\n",
       "      ' [-0.28486004 -0.10302754  0.32539868  0.927449    0.08155926  0.08009975\\n',\n",
       "      '  0.07816952  0.21600497 -0.8289922   0.13672896  0.6494508  -0.98236024\\n',\n",
       "      ' -0.33522588 -0.22463615 -0.09952935  0.38390076 -0.23069428  0.16955726\\n',\n",
       "      ' -0.56284493 -1.3821694 ]\\n',\n",
       "      \"与'周瑜'相似度最高的10个词：\\n\",\n",
       "      \"[('陆逊', 0.9333921670913696), ('袁术', 0.9324349761009216), ('钟会', 0.9308609962463379), ('孙策', 0.9300845265388489), ('孙夫人', 0.9157758355140686), ('夏侯楙', 0.9143756031990051), ('吕布', 0.9128043055534363), ('曹真', 0.9090415835380554), ('吴侯', 0.9084752202033997), ('袁绍', 0.9077554941177368)]\\n\",\n",
       "      \"'刘备'和'曹操'的相似度：0.8007414937019348\\n\",\n",
       "      \"在词'孙权/曹操/刘备/孙夫人'中,'孙夫人'与其他词不属于同一类\\n\"]}],\n",
       "   'source': ['import jieba\\n',\n",
       "    'import re\\n',\n",
       "    'from gensim.models import Word2Vec\\n',\n",
       "    'with open(r\"C:\\\\Users\\\\Administrator\\\\Desktop\\\\.ipynb_checkpoints\\\\sanguo.txt\",encoding=\\'utf-8\\') as f:\\n',\n",
       "    '    lines=[]\\n',\n",
       "    '    for line in f:\\n',\n",
       "    '        temp=jieba.lcut(line)\\n',\n",
       "    '        words=[]\\n',\n",
       "    '        for i in temp:\\n',\n",
       "    '            i = re.sub(\"[\\\\s+\\\\.\\\\!\\\\/_,$%^*(+\\\\\"\\\\\\'””《》]+|[+——！，。？、~@#￥%……&*（）：；‘]+\", \"\", i)\\n',\n",
       "    '            if len(i)>0:\\n',\n",
       "    '                words.append(i)\\n',\n",
       "    '        if len(words)>0:\\n',\n",
       "    '            lines.append(words)\\n',\n",
       "    'model=Word2Vec(lines,vector_size=20,window=2,min_count=3,epochs=7,negative=10,sg=1)\\n',\n",
       "    'print(\"\\'周瑜\\'的词向量为：\\\\n\",model.wv.get_vector(\\'周瑜\\'))\\n',\n",
       "    'print(\"与\\'周瑜\\'相似度最高的10个词：\")\\n',\n",
       "    \"print(model.wv.most_similar('周瑜',topn=10))\\n\",\n",
       "    'print(\"\\'刘备\\'和\\'曹操\\'的相似度：{}\".format(model.wv.similarity(\\'刘备\\',\\'曹操\\')))\\n',\n",
       "    'words=\"孙权 曹操 刘备 孙夫人\"\\n',\n",
       "    'print(\"在词\\'孙权/曹操/刘备/孙夫人\\'中,\\'{}\\'与其他词不属于同一类\".format(model.wv.doesnt_match(words.split())))']}],\n",
       " 'metadata': {'kernelspec': {'display_name': 'Python 3',\n",
       "   'language': 'python',\n",
       "   'name': 'python3'},\n",
       "  'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},\n",
       "   'file_extension': '.py',\n",
       "   'mimetype': 'text/x-python',\n",
       "   'name': 'python',\n",
       "   'nbconvert_exporter': 'python',\n",
       "   'pygments_lexer': 'ipython3',\n",
       "   'version': '3.7.0'}},\n",
       " 'nbformat': 4,\n",
       " 'nbformat_minor': 2}"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "{\n",
    " \"cells\": [\n",
    "  {\n",
    "   \"cell_type\": \"code\",\n",
    "   \"execution_count\": 18,\n",
    "   \"metadata\": {},\n",
    "   \"outputs\": [\n",
    "    {\n",
    "     \"name\": \"stdout\",\n",
    "     \"output_type\": \"stream\",\n",
    "     \"text\": [\n",
    "      \"'周瑜'的词向量为：\\n\",\n",
    "      \" [-0.28486004 -0.10302754  0.32539868  0.927449    0.08155926  0.08009975\\n\",\n",
    "      \"  0.07816952  0.21600497 -0.8289922   0.13672896  0.6494508  -0.98236024\\n\",\n",
    "      \" -0.33522588 -0.22463615 -0.09952935  0.38390076 -0.23069428  0.16955726\\n\",\n",
    "      \" -0.56284493 -1.3821694 ]\\n\",\n",
    "      \"与'周瑜'相似度最高的10个词：\\n\",\n",
    "      \"[('陆逊', 0.9333921670913696), ('袁术', 0.9324349761009216), ('钟会', 0.9308609962463379), ('孙策', 0.9300845265388489), ('孙夫人', 0.9157758355140686), ('夏侯楙', 0.9143756031990051), ('吕布', 0.9128043055534363), ('曹真', 0.9090415835380554), ('吴侯', 0.9084752202033997), ('袁绍', 0.9077554941177368)]\\n\",\n",
    "      \"'刘备'和'曹操'的相似度：0.8007414937019348\\n\",\n",
    "      \"在词'孙权/曹操/刘备/孙夫人'中,'孙夫人'与其他词不属于同一类\\n\"\n",
    "     ]\n",
    "    }\n",
    "   ],\n",
    "   \"source\": [\n",
    "    \"import jieba\\n\",\n",
    "    \"import re\\n\",\n",
    "    \"from gensim.models import Word2Vec\\n\",\n",
    "    \"with open(r\\\"C:\\\\Users\\\\Administrator\\\\Desktop\\\\.ipynb_checkpoints\\\\sanguo.txt\\\",encoding='utf-8') as f:\\n\",\n",
    "    \"    lines=[]\\n\",\n",
    "    \"    for line in f:\\n\",\n",
    "    \"        temp=jieba.lcut(line)\\n\",\n",
    "    \"        words=[]\\n\",\n",
    "    \"        for i in temp:\\n\",\n",
    "    \"            i = re.sub(\\\"[\\\\s+\\\\.\\\\!\\\\/_,$%^*(+\\\\\\\"\\\\'””《》]+|[+——！，。？、~@#￥%……&*（）：；‘]+\\\", \\\"\\\", i)\\n\",\n",
    "    \"            if len(i)>0:\\n\",\n",
    "    \"                words.append(i)\\n\",\n",
    "    \"        if len(words)>0:\\n\",\n",
    "    \"            lines.append(words)\\n\",\n",
    "    \"model=Word2Vec(lines,vector_size=20,window=2,min_count=3,epochs=7,negative=10,sg=1)\\n\",\n",
    "    \"print(\\\"'周瑜'的词向量为：\\\\n\\\",model.wv.get_vector('周瑜'))\\n\",\n",
    "    \"print(\\\"与'周瑜'相似度最高的10个词：\\\")\\n\",\n",
    "    \"print(model.wv.most_similar('周瑜',topn=10))\\n\",\n",
    "    \"print(\\\"'刘备'和'曹操'的相似度：{}\\\".format(model.wv.similarity('刘备','曹操')))\\n\",\n",
    "    \"words=\\\"孙权 曹操 刘备 孙夫人\\\"\\n\",\n",
    "    \"print(\\\"在词'孙权/曹操/刘备/孙夫人'中,'{}'与其他词不属于同一类\\\".format(model.wv.doesnt_match(words.split())))\"\n",
    "   ]\n",
    "  }\n",
    " ],\n",
    " \"metadata\": {\n",
    "  \"kernelspec\": {\n",
    "   \"display_name\": \"Python 3\",\n",
    "   \"language\": \"python\",\n",
    "   \"name\": \"python3\"\n",
    "  },\n",
    "  \"language_info\": {\n",
    "   \"codemirror_mode\": {\n",
    "    \"name\": \"ipython\",\n",
    "    \"version\": 3\n",
    "   },\n",
    "   \"file_extension\": \".py\",\n",
    "   \"mimetype\": \"text/x-python\",\n",
    "   \"name\": \"python\",\n",
    "   \"nbconvert_exporter\": \"python\",\n",
    "   \"pygments_lexer\": \"ipython3\",\n",
    "   \"version\": \"3.7.0\"\n",
    "  }\n",
    " },\n",
    " \"nbformat\": 4,\n",
    " \"nbformat_minor\": 2\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
